Using gurobipy-pandas, Databricks, and Gurobi Instant Cloud
Gurobi Development Team
(
pd.read_sql(
"select * from shifts",
connection,
parse_dates=["start_time", "end_time"]
)
.sample(7)
)| id | start_time | end_time | staff_count | qualification_id | |
|---|---|---|---|---|---|
| 1 | 1 | 2023-05-02 08:00:00 | 2023-05-02 16:00:00 | 2 | 1 |
| 23 | 23 | 2023-05-10 08:00:00 | 2023-05-10 16:00:00 | 1 | 0 |
| 25 | 25 | 2023-05-12 08:00:00 | 2023-05-12 16:00:00 | 1 | 0 |
| 20 | 20 | 2023-05-07 08:00:00 | 2023-05-07 16:00:00 | 1 | 0 |
| 12 | 12 | 2023-05-13 08:00:00 | 2023-05-13 16:00:00 | 5 | 1 |
| 10 | 10 | 2023-05-11 08:00:00 | 2023-05-11 16:00:00 | 4 | 1 |
| 8 | 8 | 2023-05-09 08:00:00 | 2023-05-09 16:00:00 | 2 | 1 |
(
pd.read_sql(
"select * from availability",
connection,
parse_dates=["from_time", "to_time"],
)
.sample(7)
)| id | from_time | to_time | staff_id | |
|---|---|---|---|---|
| 2 | 2 | 2023-05-01 | 2023-06-01 | 60128 |
| 7 | 7 | 2023-05-03 | 2023-05-11 | 60131 |
| 6 | 6 | 2023-05-15 | 2023-06-01 | 60130 |
| 4 | 4 | 2023-05-10 | 2023-06-01 | 60129 |
| 1 | 1 | 2023-05-04 | 2023-06-01 | 60127 |
| 8 | 8 | 2023-05-01 | 2023-05-02 | 60132 |
| 9 | 9 | 2023-05-04 | 2023-06-01 | 60132 |
This is really abstract …
The important part is the data!
| Staff | Shift |
|---|---|
| i1 | j1 |
| i1 | j2 |
| i2 | j1 |
| Shift | Count |
|---|---|
| i1 | n1 |
| i2 | n2 |
| i3 | n3 |
| Staff | Shift1 | Shift2 |
|---|---|---|
| i1 | j1 | j2 |
| i1 | j2 | j3 |
query = """
SELECT
shifts.id as shift_id,
staff_qualifications.staff_id as staff_id
FROM shifts
INNER JOIN staff_qualifications ON (
staff_qualifications.qualification_id = shifts.qualification_id
)
INNER JOIN availability ON (
availability.staff_id == staff_qualifications.staff_id
AND shifts.start_time >= availability.from_time
AND shifts.end_time <= availability.to_time
)
INNER JOIN staff on (
staff_qualifications.staff_id == staff.id
)
WHERE staff.active == 1
"""Result: we only record a possible assignment when a staff member is:
| shift_id | staff_id | |
|---|---|---|
| 9 | 0 | 60126 |
| 134 | 25 | 60132 |
| 58 | 4 | 60128 |
| 114 | 22 | 60131 |
This covers our ‘possible assignments’ input table for the model
query = """
SELECT id as shift_id, staff_count
FROM shifts
WHERE start_time BETWEEN '2023-04-01T00:00:00Z' and '2023-05-30T00:00:00Z'
"""
staff_required = pd.read_sql(query, connection, index_col="shift_id")
staff_required.sample(4)| staff_count | |
|---|---|
| shift_id | |
| 7 | 2 |
| 23 | 1 |
| 0 | 3 |
| 10 | 4 |
This covers our ‘shift requirements’ input table for the model
query = """
SELECT
s1.id as shift1_id,
s2.id as shift2_id,
staff.id as staff_id
FROM shifts s1
INNER JOIN shifts s2
INNER JOIN staff
ON s2.start_time >= s1.start_time
AND s2.start_time <= DATETIME(s1.end_time, '+8 hours')
AND s1.id != s2.id
"""
shift_conflicts = pd.read_sql(query, connection)
shift_conflicts.sample(3)| shift1_id | shift2_id | staff_id | |
|---|---|---|---|
| 181 | 25 | 11 | 60132 |
| 104 | 14 | 0 | 60132 |
| 118 | 16 | 2 | 60132 |
This covers our ‘conflicting assignments’ input table for the model
import gurobipy as gp
from gurobipy import GRB
import pandas as pd
import gurobipy_pandas as gppd
gppd.set_interactive()
feasible_assignments = pd.read_feather("feasible_assignments.feather")
staff_required = pd.read_feather("staff_required.feather")
shift_conflicts = pd.read_feather("shift_conflicts.feather")
env = gp.Env()
model = gp.Model(env=env)model.ModelSense = GRB.MAXIMIZE
assign = gppd.add_vars(
model,
feasible_assignments.set_index(["staff_id", "shift_id"]),
obj=1.0,
vtype=GRB.BINARY,
name="assign",
)
assign.head()staff_id shift_id
60126 14 <gurobi.Var assign[60126,14]>
15 <gurobi.Var assign[60126,15]>
16 <gurobi.Var assign[60126,16]>
17 <gurobi.Var assign[60126,17]>
18 <gurobi.Var assign[60126,18]>
Name: assign, dtype: object
constr_requires = gppd.add_constrs(
model,
assign.groupby("shift_id").sum(),
GRB.LESS_EQUAL,
staff_required["staff_count"],
name="staffing",
)
constr_requires.head()shift_id
0 <gurobi.Constr staffing[0]>
1 <gurobi.Constr staffing[1]>
2 <gurobi.Constr staffing[2]>
3 <gurobi.Constr staffing[3]>
4 <gurobi.Constr staffing[4]>
Name: staffing, dtype: object
df_conflict_vars = (
shift_conflicts
.join(assign.rename("assign1"), on=["staff_id", "shift1_id"])
.join(assign.rename("assign2"), on=["staff_id", "shift2_id"])
.dropna()
)
df_conflict_vars.head()| shift1_id | shift2_id | staff_id | assign1 | assign2 | |
|---|---|---|---|---|---|
| 0 | 0 | 14 | 60126 | <gurobi.Var assign[60126,0]> | <gurobi.Var assign[60126,14]> |
| 2 | 0 | 14 | 60128 | <gurobi.Var assign[60128,0]> | <gurobi.Var assign[60128,14]> |
| 3 | 0 | 14 | 60129 | <gurobi.Var assign[60129,0]> | <gurobi.Var assign[60129,14]> |
| 4 | 0 | 14 | 60130 | <gurobi.Var assign[60130,0]> | <gurobi.Var assign[60130,14]> |
| 6 | 0 | 14 | 60132 | <gurobi.Var assign[60132,0]> | <gurobi.Var assign[60132,14]> |
constr_conflicts = df_conflict_vars.gppd.add_constrs(
model,
"assign1 + assign2 <= 1",
name="conflict",
)
constr_conflicts.head()| shift1_id | shift2_id | staff_id | assign1 | assign2 | conflict | |
|---|---|---|---|---|---|---|
| 0 | 0 | 14 | 60126 | <gurobi.Var assign[60126,0]> | <gurobi.Var assign[60126,14]> | <gurobi.Constr conflict[0]> |
| 2 | 0 | 14 | 60128 | <gurobi.Var assign[60128,0]> | <gurobi.Var assign[60128,14]> | <gurobi.Constr conflict[2]> |
| 3 | 0 | 14 | 60129 | <gurobi.Var assign[60129,0]> | <gurobi.Var assign[60129,14]> | <gurobi.Constr conflict[3]> |
| 4 | 0 | 14 | 60130 | <gurobi.Var assign[60130,0]> | <gurobi.Var assign[60130,14]> | <gurobi.Constr conflict[4]> |
| 6 | 0 | 14 | 60132 | <gurobi.Var assign[60132,0]> | <gurobi.Var assign[60132,14]> | <gurobi.Constr conflict[6]> |
Gurobi Optimizer version 11.0.3 build v11.0.3rc0 (mac64[arm] - Darwin 23.6.0 23G93)
CPU model: Apple M3 Pro
Thread count: 11 physical cores, 11 logical processors, using up to 11 threads
Optimize a model with 176 rows, 148 columns and 444 nonzeros
Model fingerprint: 0xbd1deb8b
Variable types: 0 continuous, 148 integer (148 binary)
Coefficient statistics:
Matrix range [1e+00, 1e+00]
Objective range [1e+00, 1e+00]
Bounds range [1e+00, 1e+00]
RHS range [1e+00, 5e+00]
Found heuristic solution: objective 58.0000000
Presolve removed 167 rows and 134 columns
Presolve time: 0.00s
Presolved: 9 rows, 14 columns, 28 nonzeros
Variable types: 0 continuous, 14 integer (14 binary)
Root relaxation: cutoff, 0 iterations, 0.00 seconds (0.00 work units)
Explored 1 nodes (0 simplex iterations) in 0.00 seconds (0.00 work units)
Thread count was 11 (of 11 available processors)
Solution count 1: 58
Optimal solution found (tolerance 1.00e-04)
Best objective 5.800000000000e+01, best bound 5.800000000000e+01, gap 0.0000%
assignments = (
assign.gppd.X.reset_index().query("assign > 0.9")
[["staff_id", "shift_id"]]
)
assignments.to_feather("assignments.feather")
assignments.sample(5)| staff_id | shift_id | |
|---|---|---|
| 77 | 60129 | 24 |
| 29 | 60127 | 3 |
| 102 | 60130 | 5 |
| 92 | 60130 | 20 |
| 30 | 60127 | 4 |
query = """
SELECT
name, start_time, end_time
FROM assignments
INNER JOIN staff on staff.id == assignments.staff_id
INNER JOIN shifts on shifts.id == assignments.shift_id
"""
pd.read_sql(query, connection, parse_dates=["start_time", "end_time"]).sample(8)| name | start_time | end_time | |
|---|---|---|---|
| 44 | Vincent | 2023-05-03 08:00:00 | 2023-05-03 16:00:00 |
| 20 | Matsumi | 2023-05-08 08:00:00 | 2023-05-08 16:00:00 |
| 25 | Matsumi | 2023-05-04 08:00:00 | 2023-05-04 16:00:00 |
| 47 | Vincent | 2023-05-11 08:00:00 | 2023-05-11 16:00:00 |
| 24 | Matsumi | 2023-05-03 08:00:00 | 2023-05-03 16:00:00 |
| 3 | Siva | 2023-05-03 08:00:00 | 2023-05-03 16:00:00 |
| 43 | Vincent | 2023-05-10 08:00:00 | 2023-05-10 16:00:00 |
| 31 | Matsumi | 2023-05-13 08:00:00 | 2023-05-13 16:00:00 |
query = """
SELECT * FROM (
SELECT id, start_time, end_time, IfNull(assigned, 0) as assigned,
staff_count as required, qualification_id
FROM ( SELECT shift_id, count(staff_id) as assigned
FROM assignments
GROUP BY shift_id )
RIGHT JOIN shifts on shifts.id == shift_id
) WHERE assigned < required
"""
pd.read_sql(query, connection, parse_dates=["start_time", "end_time"])| id | start_time | end_time | assigned | required | qualification_id | |
|---|---|---|---|---|---|---|
| 0 | 11 | 2023-05-12 08:00:00 | 2023-05-12 16:00:00 | 4 | 5 | 1 |
| 1 | 12 | 2023-05-13 08:00:00 | 2023-05-13 16:00:00 | 4 | 5 | 1 |
| 2 | 13 | 2023-05-14 08:00:00 | 2023-05-14 16:00:00 | 4 | 5 | 1 |
| 3 | 25 | 2023-05-12 08:00:00 | 2023-05-12 16:00:00 | 0 | 1 | 0 |
| 4 | 26 | 2023-05-13 08:00:00 | 2023-05-13 16:00:00 | 0 | 1 | 0 |
| 5 | 27 | 2023-05-14 08:00:00 | 2023-05-14 16:00:00 | 0 | 1 | 0 |
Try out the workflow yourself on Databricks!
github.com/Gurobi/gurobipy-pandas/tree/main/webinar/2409-gppd-etl
Q&A
©️ Gurobi Optimization